### Replication Package for "Why is Intermediating Houses so Difficult? Evidence from iBuyers"
### Buchak, Matvos, Piskorski, and Seru
###
###
### buchak@stanford.edu

### MLS listings analysis. 


library(data.table)
library(lfe)
library(stargazer)
library(survival)
library(zoo)
library(ggplot2)
library(Hmisc)

REPLICATION = T



Table_1B_2A <- function() {
  
  if(REPLICATION) {
    toReg <- fread('../data/processed/mls/share/spells_processed.csv')
  } else {
    toReg <- fread('../data/processed/mls/spells_processed.csv')
  }
  
	toReg[,Lister := as.factor(Lister)]
	toReg[,Lister := factor(Lister,levels = c('Other','iBuyer','Flipper'))]
	
	# Table 1 Panel B: Summary stats for MLS data.
	s1 <-      rbind(toReg[Lister == 'Other',  j=list(type='First list price: Other',N = sum(!is.na(first.listing.price)),m=mean(first.listing.price,na.rm=T),sd = sd(first.listing.price,na.rm=T),p5 = quantile(first.listing.price,.05,na.rm=T), p25 = quantile(first.listing.price,.25,na.rm=T), p50 = quantile(first.listing.price,.5,na.rm=T), p75 = quantile(first.listing.price,.75,na.rm=T), p95 = quantile(first.listing.price,.95,na.rm=T))],
						   toReg[Lister == 'iBuyer', j=list(type='First list price: iBuyer',N = sum(!is.na(first.listing.price)),m=mean(first.listing.price,na.rm=T),sd = sd(first.listing.price,na.rm=T),p5 = quantile(first.listing.price,.05,na.rm=T), p25 = quantile(first.listing.price,.25,na.rm=T), p50 = quantile(first.listing.price,.5,na.rm=T), p75 = quantile(first.listing.price,.75,na.rm=T), p95 = quantile(first.listing.price,.95,na.rm=T))],
						   toReg[Lister == 'Flipper',j=list(type='First list price: Flipper',N = sum(!is.na(first.listing.price)),m=mean(first.listing.price,na.rm=T),sd = sd(first.listing.price,na.rm=T),p5 = quantile(first.listing.price,.05,na.rm=T), p25 = quantile(first.listing.price,.25,na.rm=T), p50 = quantile(first.listing.price,.5,na.rm=T), p75 = quantile(first.listing.price,.75,na.rm=T), p95 = quantile(first.listing.price,.95,na.rm=T))])
	s2 <-		rbind( toReg[Lister == 'Other',  j=list(type='Mentions renovations: Other',N = sum(!is.na(renovation.in.listing)),m=mean(renovation.in.listing,na.rm=T),sd = sd(renovation.in.listing,na.rm=T),p5 = quantile(renovation.in.listing,.05,na.rm=T), p25 = quantile(renovation.in.listing,.25,na.rm=T), p50 = quantile(renovation.in.listing,.5,na.rm=T), p75 = quantile(renovation.in.listing,.75,na.rm=T), p95 = quantile(renovation.in.listing,.95,na.rm=T))],
						   toReg[Lister == 'iBuyer', j=list(type='Mentions renovations: iBuyer',N = sum(!is.na(renovation.in.listing)),m=mean(renovation.in.listing,na.rm=T),sd = sd(renovation.in.listing,na.rm=T),p5 = quantile(renovation.in.listing,.05,na.rm=T), p25 = quantile(renovation.in.listing,.25,na.rm=T), p50 = quantile(renovation.in.listing,.5,na.rm=T), p75 = quantile(renovation.in.listing,.75,na.rm=T), p95 = quantile(renovation.in.listing,.95,na.rm=T))],
						   toReg[Lister == 'Flipper',j=list(type='Mentions renovations: Flipper',N = sum(!is.na(renovation.in.listing)),m=mean(renovation.in.listing,na.rm=T),sd = sd(renovation.in.listing,na.rm=T),p5 = quantile(renovation.in.listing,.05,na.rm=T), p25 = quantile(renovation.in.listing,.25,na.rm=T), p50 = quantile(renovation.in.listing,.5,na.rm=T), p75 = quantile(renovation.in.listing,.75,na.rm=T), p95 = quantile(renovation.in.listing,.95,na.rm=T))])
	#s3 <- 		rbind( toReg[Lister == 'Has Sale: Other',  j=list(type='Other',N = sum(!is.na(total.listings)),m=mean(total.listings,na.rm=T),sd = sd(total.listings,na.rm=T),p5 = quantile(total.listings,.05,na.rm=T), p25 = quantile(total.listings,.25,na.rm=T), p50 = quantile(total.listings,.5,na.rm=T), p75 = quantile(total.listings,.75,na.rm=T), p95 = quantile(total.listings,.95,na.rm=T))],
  #				   toReg[Lister == 'iBuyer', j=list(type='iBuyer',N = sum(!is.na(total.listings)),m=mean(total.listings,na.rm=T),sd = sd(total.listings,na.rm=T),p5 = quantile(total.listings,.05,na.rm=T), p25 = quantile(total.listings,.25,na.rm=T), p50 = quantile(total.listings,.5,na.rm=T), p75 = quantile(total.listings,.75,na.rm=T), p95 = quantile(total.listings,.95,na.rm=T))],
	#				   toReg[Lister == 'Flipper',j=list(type='Flipper',N = sum(!is.na(total.listings)),m=mean(total.listings,na.rm=T),sd = sd(total.listings,na.rm=T),p5 = quantile(total.listings,.05,na.rm=T), p25 = quantile(total.listings,.25,na.rm=T), p50 = quantile(total.listings,.5,na.rm=T), p75 = quantile(total.listings,.75,na.rm=T), p95 = quantile(total.listings,.95,na.rm=T))])
	s4 <- 		rbind( toReg[Lister == 'Other',  j=list(type='Has sale: Other',N = sum(!is.na(has.sale)),m=mean(has.sale,na.rm=T),sd = sd(has.sale,na.rm=T),p5 = quantile(has.sale,.05,na.rm=T), p25 = quantile(has.sale,.25,na.rm=T), p50 = quantile(has.sale,.5,na.rm=T), p75 = quantile(has.sale,.75,na.rm=T), p95 = quantile(has.sale,.95,na.rm=T))],
						   toReg[Lister == 'iBuyer', j=list(type='Has sale: iBuyer',N = sum(!is.na(has.sale)),m=mean(has.sale,na.rm=T),sd = sd(has.sale,na.rm=T),p5 = quantile(has.sale,.05,na.rm=T), p25 = quantile(has.sale,.25,na.rm=T), p50 = quantile(has.sale,.5,na.rm=T), p75 = quantile(has.sale,.75,na.rm=T), p95 = quantile(has.sale,.95,na.rm=T))],
						   toReg[Lister == 'Flipper',j=list(type='Has sale: Flipper',N = sum(!is.na(has.sale)),m=mean(has.sale,na.rm=T),sd = sd(has.sale,na.rm=T),p5 = quantile(has.sale,.05,na.rm=T), p25 = quantile(has.sale,.25,na.rm=T), p50 = quantile(has.sale,.5,na.rm=T), p75 = quantile(has.sale,.75,na.rm=T), p95 = quantile(has.sale,.95,na.rm=T))])
	s5 <- 		rbind( toReg[Lister == 'Other',  j=list(type='Days on market: Other',N = sum(!is.na(days.first.sale.listing)),m=mean(days.first.sale.listing,na.rm=T),sd = sd(days.first.sale.listing,na.rm=T),p5 = quantile(days.first.sale.listing,.05,na.rm=T), p25 = quantile(days.first.sale.listing,.25,na.rm=T), p50 = quantile(days.first.sale.listing,.5,na.rm=T), p75 = quantile(days.first.sale.listing,.75,na.rm=T), p95 = quantile(days.first.sale.listing,.95,na.rm=T))],
						   toReg[Lister == 'iBuyer', j=list(type='Days on market: iBuyer',N = sum(!is.na(days.first.sale.listing)),m=mean(days.first.sale.listing,na.rm=T),sd = sd(days.first.sale.listing,na.rm=T),p5 = quantile(days.first.sale.listing,.05,na.rm=T), p25 = quantile(days.first.sale.listing,.25,na.rm=T), p50 = quantile(days.first.sale.listing,.5,na.rm=T), p75 = quantile(days.first.sale.listing,.75,na.rm=T), p95 = quantile(days.first.sale.listing,.95,na.rm=T))],
						   toReg[Lister == 'Flipper',j=list(type='Days on market: Flipper',N = sum(!is.na(days.first.sale.listing)),m=mean(days.first.sale.listing,na.rm=T),sd = sd(days.first.sale.listing,na.rm=T),p5 = quantile(days.first.sale.listing,.05,na.rm=T), p25 = quantile(days.first.sale.listing,.25,na.rm=T), p50 = quantile(days.first.sale.listing,.5,na.rm=T), p75 = quantile(days.first.sale.listing,.75,na.rm=T), p95 = quantile(days.first.sale.listing,.95,na.rm=T))])
	#s6 <- 		rbind( toReg[Lister == 'Other' & sale.to.first.listing < 1,  j=list(type='Other',N = sum(!is.na(sale.to.first.listing)),m=mean(sale.to.first.listing,na.rm=T),sd = sd(sale.to.first.listing,na.rm=T),p5 = quantile(sale.to.first.listing,.05,na.rm=T), p25 = quantile(sale.to.first.listing,.25,na.rm=T), p50 = quantile(sale.to.first.listing,.5,na.rm=T), p75 = quantile(sale.to.first.listing,.75,na.rm=T), p95 = quantile(sale.to.first.listing,.95,na.rm=T))],
	#					   toReg[Lister == 'iBuyer' & sale.to.first.listing < 1, j=list(type='iBuyer',N = sum(!is.na(sale.to.first.listing)),m=mean(sale.to.first.listing,na.rm=T),sd = sd(sale.to.first.listing,na.rm=T),p5 = quantile(sale.to.first.listing,.05,na.rm=T), p25 = quantile(sale.to.first.listing,.25,na.rm=T), p50 = quantile(sale.to.first.listing,.5,na.rm=T), p75 = quantile(sale.to.first.listing,.75,na.rm=T), p95 = quantile(sale.to.first.listing,.95,na.rm=T))],
	#					   toReg[Lister == 'Flipper' & sale.to.first.listing < 1,j=list(type='Flipper',N = sum(!is.na(sale.to.first.listing)),m=mean(sale.to.first.listing,na.rm=T),sd = sd(sale.to.first.listing,na.rm=T),p5 = quantile(sale.to.first.listing,.05,na.rm=T), p25 = quantile(sale.to.first.listing,.25,na.rm=T), p50 = quantile(sale.to.first.listing,.5,na.rm=T), p75 = quantile(sale.to.first.listing,.75,na.rm=T), p95 = quantile(sale.to.first.listing,.95,na.rm=T))])
	
	ss <- rbind(s1,s2,s4,s5)	
	names(ss) <- c('Variable','N','Mean','S.D.','5%','25%','50%','75%','95%')
	stargazer(ss,summary=F,type='html',out = '../out/tables/1B.html')
	
		
	# Table 2 Panel A.
	pp1  <- felm(log(first.listing.price)                    ~ Lister + log(gla_sqft ) + age  | paste(zip,first.listing.year,listing.month) + renovation.in.listing + age.bucket  +  bedrooms + floors_in_building,data=toReg)
	pp2 <-  felm(renovation.in.listing                       ~ Lister + log(gla_sqft ) + age  | paste(zip,first.listing.year,listing.month)                         + age.bucket  +  bedrooms + floors_in_building,data=toReg)
	pp3  <- felm(total.listings                              ~ Lister + log(gla_sqft ) + age  | paste(zip,first.listing.year,listing.month) + renovation.in.listing + age.bucket  +  bedrooms + floors_in_building,data=toReg)
	pp4  <- felm(has.sale                                    ~ Lister + log(gla_sqft ) + age  | paste(zip,first.listing.year,listing.month) + renovation.in.listing + age.bucket  +  bedrooms + floors_in_building,data=toReg)
	pp5  <- felm(days.first.sale.listing                     ~ Lister + log(gla_sqft ) + age  | paste(zip,first.listing.year,listing.month) + renovation.in.listing + age.bucket  +  bedrooms + floors_in_building,data=toReg[has.sale == 1])
	pp6  <- felm(sale.to.first.listing                       ~ Lister + log(gla_sqft ) + age  | paste(zip,first.listing.year,listing.month) + renovation.in.listing + age.bucket  +  bedrooms + floors_in_building,data=toReg[has.sale == 1 & abs(sale.to.first.listing) < 0.25])
		
	
	# Days between listing and sale
	pp.q  <- felm(days.first.sale.listing                       ~ Lister + log(gla_sqft ) + age + log(first.listing.price)  | paste(zip,first.listing.year,listing.month) + renovation.in.listing + age.bucket  +  bedrooms + floors_in_building,data=toReg[has.sale == 1 & abs(sale.to.first.listing) < 0.25])
	
	
	# Run a survival model too, and do robustness
	data.surv <- toReg
	
	# Sale an be cut short by (1) time runs out, or (2) withdrawn
	# Completed transactions
	data.surv[has.sale == 1,hold.time := as.numeric(sale.date - first.listing.date)]
	data.surv[has.sale.expansive == 1,hold.time.expansive := as.numeric(sale.date.expansive - first.listing.date)]
	data.surv[has.sale == 1,complete.spell := 1]
	data.surv[has.sale.expansive == 1,complete.spell.expansive := 1]
	
	data.surv[has.sale == 0,           hold.time := as.numeric(last.listing.date - first.listing.date)]
	data.surv[has.sale.expansive == 0, hold.time.expansive := as.numeric(next.last.listing.date - first.listing.date)]
	data.surv[has.sale == 0,complete.spell := 0]
	data.surv[has.sale.expansive == 0,complete.spell.expansive := 0]
		
	surv_object <- Surv(time = data.surv$hold.time,event = data.surv$complete.spell)
	surv_object.expansive <- Surv(time = data.surv$hold.time.expansive,event = data.surv$complete.spell.expansive)
	
	
	surv_object.c <- Surv(time = data.surv[has.sale==1]$hold.time,event = data.surv[has.sale==1]$complete.spell)
	surv_object.expansive.c <- Surv(time = data.surv[has.sale.expansive==1]$hold.time.expansive,event = data.surv[has.sale.expansive==1]$complete.spell.expansive)
	
	pm1 <- coxph(formula = surv_object ~ Lister + log(gla_sqft ) + renovation.in.listing + age.bucket + bedrooms + floors_in_building, data = data.surv)
	pm1.a <- coxph(formula = surv_object ~ Lister + log(gla_sqft ) + renovation.in.listing + age.bucket + bedrooms + floors_in_building + log(first.listing.price), data = data.surv)
	pm2 <- coxph(formula = surv_object.c ~ Lister + log(gla_sqft ) + renovation.in.listing + age.bucket + bedrooms + floors_in_building, data = data.surv[has.sale==1])
	pm2.a <- coxph(formula = surv_object.c ~ Lister + log(gla_sqft ) + renovation.in.listing + age.bucket + bedrooms + floors_in_building  + log(first.listing.price), data = data.surv[has.sale==1])
	
	## Extensive margin of listing analysis
	if(REPLICATION) {
	  data.in <- fread('../data/processed/mls/share/corelogic_mls_merged.csv')
	} else {
	  data.in <- fread('../data/processed/mls/corelogic_mls_merged.csv')
	}
	
	
	
	
	
	
	# Create hedonics  # Create bins on house characteristics
	data.in[,landsquarefootage := as.integer(landsquarefootage)]
	data.in[,size.bin := cut(landsquarefootage,breaks = c(0,5000,10000,25000,1000000))]
	data.in[,size.bin := factor(size.bin,levels(size.bin)[c(4,1:3)])]
	  
	data.in[,multistory := as.integer(storiesnum > 1)]
  
	data.in[,qtr := as.yearqtr(date)]
	
	data.in[,price.bin := cut(saleamount,breaks = c(0,100000,250000,500000,1000000,1e999))]
	data.in[,price.bin := factor(price.bin,levels(price.bin)[c(5,1:4)])]

	
	data.in[,house.age := year - as.numeric(yearbuilt)]
	data.in[,age.bin := cut(house.age,breaks = c(0,5,15,50,1000000))]
	data.in[,age.bin := factor(age.bin,levels(age.bin)[c(4,1:3)])]
	
	
	 
	data.in[,saleprice_mls := price]
	data.in[,zip5 := substr(situszipcode,1,5)]
	
	
	data.reg <- data.in[saleamount < 10000000 & saleamount > 5000 & (is.na(saleprice_mls) | saleprice_mls < 10000000) & (iBuyer.buyer == 0 | iBuyer.seller == 0) & propertytype %in% c(10) & landsquarefootage < 30000 & transactiontype == 1 ]
	data.reg <- data.reg[!duplicated(paste(pclidirisfrmtd,date)) & year >= 2013 & year <= 2018]
  data.reg[,landsquarefootage := as.numeric(landsquarefootage)]
	
	## Extensive margin of listing.
	b2 <- felm(week.match ~ iBuyer.buyer + log(landsquarefootage) + house.age |paste(qtr,zip5) + price.bin + age.bin + size.bin + multistory + airconditioning + heating + quality + garage + locationinfluence ,data=data.reg)
	s3 <- felm(week.match ~ iBuyer.seller + log(landsquarefootage)  + house.age |paste(qtr,zip5) + price.bin + age.bin + size.bin + multistory + airconditioning + heating + quality + garage + locationinfluence ,data=data.reg)
	
	# Table 2 Panel A.
	stargazer(b2,s3,pp1,pp2,pp4,pp5,pm1,pm2,pp6,type='html',no.space=T,
	          keep = c('iBuyer.buyer','iBuyer.seller','ListeriBuyer','ListerFlipper'),
	          dep.var.labels = c('Lists on MLS','Log list price','Mentions rennovations','Leads to sale','Days on market','Days on market','Days on market','Sale-to-list price'),
	          model.names = F,column.labels = c('Linear','Linear','Linear','Linear','Linear','Linear','Hazard','Hazard','Linear'),
	          add.lines = list(c('Hedonics','Y','Y','Y','Y','Y','Y','Y','Y','Y'),
	                           c('Zip-qtr FE','Y','Y','Y','Y','Y','Y','Y','Y','Y'),
	                           c('CoreLogic','Y','Y','','','','','','',''),
	                           c('MLS (All)','','','Y','Y','Y','','Y','',''),
	                           c('MLS (Sales)','','','','','','Y','','Y','Y')),
	          omit.stat = c("adj.rsq", "f",'ser'),
	          out = '../out/tables/2A.html')
	
	
	
	
	# Robustness around Relistings (Appendix 4 Panel C).
	# Robustness checking for nearby-listing spells (i.e., failed listing spell and then they relist)
	rr0  <- felm(relists.within.month                        ~ Lister + log(gla_sqft ) + age  | paste(zip,first.listing.year,listing.month) + renovation.in.listing + age.bucket  +  bedrooms + floors_in_building,data=toReg[has.sale == 0])
	rr3  <- felm(total.listings.expansive                    ~ Lister + log(gla_sqft ) + age  | paste(zip,first.listing.year,listing.month) + renovation.in.listing + age.bucket  +  bedrooms + floors_in_building,data=toReg)
	rr4  <- felm(has.sale.expansive                          ~ Lister + log(gla_sqft ) + age  | paste(zip,first.listing.year,listing.month) + renovation.in.listing + age.bucket  +  bedrooms + floors_in_building,data=toReg)
	rr5  <- felm(as.numeric(days.first.sale.listing.expansive) ~ Lister + log(gla_sqft ) + age  | paste(zip,first.listing.year,listing.month) + renovation.in.listing + age.bucket  +  bedrooms + floors_in_building,data=toReg[has.sale.expansive == 1 ])
	rr6  <- felm(sale.to.first.listing.expansive             ~ Lister + log(gla_sqft ) + age  | paste(zip,first.listing.year,listing.month) + renovation.in.listing + age.bucket  +  bedrooms + floors_in_building,data=toReg[has.sale.expansive == 1 & abs(sale.to.first.listing.expansive) < 0.25])
		
	rm1 <- coxph(formula = surv_object.expansive ~ Lister + log(gla_sqft ) + renovation.in.listing + age.bucket + bedrooms + floors_in_building, data = data.surv)
	rm2 <- coxph(formula = surv_object.expansive.c ~ Lister + log(gla_sqft ) + renovation.in.listing + age.bucket + bedrooms + floors_in_building, data = data.surv[has.sale.expansive==1])
	
	# Panel A4C.
	stargazer(rr0,rr3,rr4,rr5,rm1,rm2,rr6,type='html',no.space=T,out='../out/tables/A4C.html',
	          keep = c('ListeriBuyer','ListerFlipper'),
	          dep.var.labels = c('Relists within 30 days','Total listings','Leads to sale','Days-on-market','Days-on-market','Sale-to-list'),
	          model.names = F,column.labels = c('Linear','Linear','Linear','Linear','Hazard','Hazard','Linear'),
	          add.lines = list(c('Hedonic controls','Y','Y','Y','Y','Y','Y','Y'),
	                           c('Zip-Quarter FE','Y','Y','Y','Y','N','N','Y'),
	                           c('Sample','Failed first list','All','All','Sales only','All','Sales only','Sales only')),
	          omit.stat = c('adj.rsq','f','ser'))
		
	
}



Appendix_A3_A4 <- function() {
  if(REPLICATION) {
    data.in <- fread('../data/processed/mls/share/corelogic_mls_merged.csv')
  } else {
    data.in <- fread('../data/processed/mls/corelogic_mls_merged.csv')
  }
  
  
	data.reg <- data.in[(is.na(status) | !(status %in%  c('Withdrawn','Expired','Cancelled'))) & propertytype %in% c(10) &  (iBuyer.buyer == 0 | iBuyer.seller == 0) & saleamount < 1000000 & saleamount > 5000 & !duplicated(paste(pclidirisfrmtd,date))] 
	
	# Appendix A.3.A: Corelogic transactions with an MLS listing
	a3a <- rbind(
		data.reg[,j=list(year = 0,N = .N,ib.buys = sum(iBuyer.buyer,na.rm=T),ib.sells = sum(iBuyer.seller,na.rm=T),dm = mean(day.match),wm = mean(week.match),mm = mean(month.match),qm = mean(qtr.match))],
		data.reg[,j=list(N = .N,ib.buys = sum(iBuyer.buyer,na.rm=T),ib.sells = sum(iBuyer.seller,na.rm=T),dm = mean(day.match),wm = mean(week.match),mm = mean(month.match),qm = mean(qtr.match)),by='year']
	)
	a3a <- a3a[order(year)]
	a3a <- a3a[year %in% c(0,2010:2019)]
	names(a3a) <- c('Year','N','# iBuyer Buys','# iBuyer Sales','Day match','Week match','Month match','Quarter match')
	stargazer(a3a,type='html',summary=F,out='../out/tables/A3A.html')
	
	
	# Appendix A.3.B: Corelogic and MLS sale price consistency
	data.reg[,saleprice_mls := price]
	a3b = rbind(
	data.reg[day.match == 1,j=list(Window = 'All: Day',c_mls_cl = cor(log(saleprice_mls),log(saleamount),use = 'na.or.complete'),exact = mean(saleprice_mls == saleamount,na.rm=T),dev1 = mean(abs(saleprice_mls/saleamount-1)<.01,na.rm=T),dev5 = mean(abs(saleprice_mls/saleamount-1)<.05,na.rm=T),mean_dev = mean(abs(saleprice_mls/saleamount-1),na.rm=T))] ,
	data.reg[week.match == 1,j=list(Window = 'All: Week',c_mls_cl = cor(log(saleprice_mls),log(saleamount),use = 'na.or.complete'),exact = mean(saleprice_mls == saleamount,na.rm=T),dev1 = mean(abs(saleprice_mls/saleamount-1)<.01,na.rm=T),dev5 = mean(abs(saleprice_mls/saleamount-1)<.05,na.rm=T),mean_dev = mean(abs(saleprice_mls/saleamount-1),na.rm=T))] ,
	data.reg[month.match == 1,j=list(Window = 'All: Month',c_mls_cl = cor(log(saleprice_mls),log(saleamount),use = 'na.or.complete'),exact = mean(saleprice_mls == saleamount,na.rm=T),dev1 = mean(abs(saleprice_mls/saleamount-1)<.01,na.rm=T),dev5 = mean(abs(saleprice_mls/saleamount-1)<.05,na.rm=T),mean_dev = mean(abs(saleprice_mls/saleamount-1),na.rm=T))] ,
	data.reg[qtr.match == 1,j=list(Window = 'All: Year',c_mls_cl = cor(log(saleprice_mls),log(saleamount),use = 'na.or.complete'),exact = mean(saleprice_mls == saleamount,na.rm=T),dev1 = mean(abs(saleprice_mls/saleamount-1)<.01,na.rm=T),dev5 = mean(abs(saleprice_mls/saleamount-1)<.05,na.rm=T),mean_dev = mean(abs(saleprice_mls/saleamount-1),na.rm=T))] ,
	data.reg[iBuyer.buyer == 1 & day.match == 1,j=list(Window = 'iBuyer Buys: Day',c_mls_cl = cor(log(saleprice_mls),log(saleamount),use = 'na.or.complete'),exact = mean(saleprice_mls == saleamount,na.rm=T),dev1 = mean(abs(saleprice_mls/saleamount-1)<.01,na.rm=T),dev5 = mean(abs(saleprice_mls/saleamount-1)<.05,na.rm=T),mean_dev = mean(abs(saleprice_mls/saleamount-1),na.rm=T))] ,
	data.reg[iBuyer.buyer == 1 & week.match == 1,j=list(Window = 'iBuyer Buys: Week',c_mls_cl = cor(log(saleprice_mls),log(saleamount),use = 'na.or.complete'),exact = mean(saleprice_mls == saleamount,na.rm=T),dev1 = mean(abs(saleprice_mls/saleamount-1)<.01,na.rm=T),dev5 = mean(abs(saleprice_mls/saleamount-1)<.05,na.rm=T),mean_dev = mean(abs(saleprice_mls/saleamount-1),na.rm=T))] ,
	data.reg[iBuyer.buyer == 1 & month.match == 1,j=list(Window = 'iBuyer Buys: Month',c_mls_cl = cor(log(saleprice_mls),log(saleamount),use = 'na.or.complete'),exact = mean(saleprice_mls == saleamount,na.rm=T),dev1 = mean(abs(saleprice_mls/saleamount-1)<.01,na.rm=T),dev5 = mean(abs(saleprice_mls/saleamount-1)<.05,na.rm=T),mean_dev = mean(abs(saleprice_mls/saleamount-1),na.rm=T))] ,
	data.reg[iBuyer.buyer == 1 & qtr.match == 1,j=list(Window = 'iBuyer Buys: Year',c_mls_cl = cor(log(saleprice_mls),log(saleamount),use = 'na.or.complete'),exact = mean(saleprice_mls == saleamount,na.rm=T),dev1 = mean(abs(saleprice_mls/saleamount-1)<.01,na.rm=T),dev5 = mean(abs(saleprice_mls/saleamount-1)<.05,na.rm=T),mean_dev = mean(abs(saleprice_mls/saleamount-1),na.rm=T))] ,
	data.reg[iBuyer.seller == 1 & day.match == 1,j=list(Window = 'iBuyer Sells: Day',c_mls_cl = cor(log(saleprice_mls),log(saleamount),use = 'na.or.complete'),exact = mean(saleprice_mls == saleamount,na.rm=T),dev1 = mean(abs(saleprice_mls/saleamount-1)<.01,na.rm=T),dev5 = mean(abs(saleprice_mls/saleamount-1)<.05,na.rm=T),mean_dev = mean(abs(saleprice_mls/saleamount-1),na.rm=T))] ,
	data.reg[iBuyer.seller == 1 & week.match == 1,j=list(Window = 'iBuyer Sells: Week',c_mls_cl = cor(log(saleprice_mls),log(saleamount),use = 'na.or.complete'),exact = mean(saleprice_mls == saleamount,na.rm=T),dev1 = mean(abs(saleprice_mls/saleamount-1)<.01,na.rm=T),dev5 = mean(abs(saleprice_mls/saleamount-1)<.05,na.rm=T),mean_dev = mean(abs(saleprice_mls/saleamount-1),na.rm=T))] ,
	data.reg[iBuyer.seller == 1 & month.match == 1,j=list(Window = 'iBuyer Sells: Month',c_mls_cl = cor(log(saleprice_mls),log(saleamount),use = 'na.or.complete'),exact = mean(saleprice_mls == saleamount,na.rm=T),dev1 = mean(abs(saleprice_mls/saleamount-1)<.01,na.rm=T),dev5 = mean(abs(saleprice_mls/saleamount-1)<.05,na.rm=T),mean_dev = mean(abs(saleprice_mls/saleamount-1),na.rm=T))] ,
	data.reg[iBuyer.seller == 1 & qtr.match == 1,j=list(Window = 'iBuyer Sells: Year',c_mls_cl = cor(log(saleprice_mls),log(saleamount),use = 'na.or.complete'),exact = mean(saleprice_mls == saleamount,na.rm=T),dev1 = mean(abs(saleprice_mls/saleamount-1)<.01,na.rm=T),dev5 = mean(abs(saleprice_mls/saleamount-1)<.05,na.rm=T),mean_dev = mean(abs(saleprice_mls/saleamount-1),na.rm=T))] 
	)
	
	names(a3b) <- c('Match window','Cor(log(MLS),log(CoreLogic))','Exact price match','|Deviation|<1%','|Deviation|<5%','Mean(|Deviation|)')
	stargazer(a3b,type='html',summary=F,out='../out/tables/A3B.html')
	print('PLEASE NOTE: When run with sample replication data, not all rows will generate because not all cases occur in the sample data (e.g., iBuyer buys matching within one day)')
	
	
	
	# Parts of appendix 4: iBuyer purchases-to-msl listing match rate by match window.
	#
	#
	# A4AL Match rate by window
	iBuyer.buys = sum(data.reg$iBuyer.buyer)
	all.buys    = nrow(data.reg)
	
	a4a = rbind(
		data.table(Window = 'day'    ,data.reg[day.match==1,j=list(N.IB = sum(iBuyer.buyer),P.IB = 100*sum(iBuyer.buyer)/iBuyer.buys,N = .N,P = 100*.N/all.buys)]),
		data.table(Window = 'week'   ,data.reg[week.match==1,j=list(N.IB = sum(iBuyer.buyer),P.IB = 100*sum(iBuyer.buyer)/iBuyer.buys,N = .N,P = 100*.N/all.buys)]),
		data.table(Window = 'month'  ,data.reg[month.match==1,j=list(N.IB = sum(iBuyer.buyer),P.IB = 100*sum(iBuyer.buyer)/iBuyer.buys,N = .N,P = 100*.N/all.buys)]),
		data.table(Window = 'quarter',data.reg[qtr.match==1,j=list(N.IB = sum(iBuyer.buyer),P.IB = 100*sum(iBuyer.buyer)/iBuyer.buys,N = .N,P = 100*.N/all.buys)])
	)
	
	names(a4a) <- c('Window','# iBuyer buys with listings','% iBuyer buys with listings','# other buys with listings','% other buys with listings')
	stargazer(a4a,type='html',summary=F,out='../out/tables/A4A.html')
	
	

	 
	data.reg <- data.in[status %in% c('Withdrawn','Cancelled','Expired') & saleamount < 1000000 & saleamount > 5000 & (is.na(price) | price < 10000000) & (iBuyer.buyer == 0 | iBuyer.seller == 0) & propertytype %in% c(10) & landsquarefootage < 30000 & transactiontype == 1 ]
	data.reg <- data.reg[!duplicated(paste(pclidirisfrmtd,date)) & year >= 2013 & year <= 2018]
  
  


  # Counts
  da = sum(data.reg[iBuyer.buyer == 1]$date.diff <= 90, na.rm=T)
  ma = sum(data.reg[iBuyer.buyer == 1]$date.diff <= 182,na.rm=T)
  qa = sum(data.reg[iBuyer.buyer == 1]$date.diff <= 365,na.rm=T)  
    
  df = nrow(data.reg[iBuyer.buyer == 1])
  mf = nrow(data.reg[iBuyer.buyer == 1])
  qf = nrow(data.reg[iBuyer.buyer == 1])  
    
  ada = sum(data.reg[iBuyer.buyer == 0]$date.diff <= 90, na.rm=T)
  ama = sum(data.reg[iBuyer.buyer == 0]$date.diff <= 182,na.rm=T)
  aqa = sum(data.reg[iBuyer.buyer == 0]$date.diff <= 365,na.rm=T)  
  
  adf = nrow(data.reg[iBuyer.buyer == 0])
  amf = nrow(data.reg[iBuyer.buyer == 0])
  aqf = nrow(data.reg[iBuyer.buyer == 0])
  
  a4b = rbind(
  	data.table(window = '90 days'  ,N.IB = da,P.IB = da/df, N = ada, P = ada/adf),
  	data.table(window = '182 days' ,N.IB = ma,P.IB = ma/mf, N = ama, P = ama/amf),
  	data.table(window = '365 days' ,N.IB = qa,P.IB = qa/qf, N = aqa, P = aqa/aqf)
  )
	names(a4b) <- c('Window','# iBuyer buys with failed listings','% iBuyer buys with failed listings','# other buys with failed listings','% other buys with failed listings')
  
  stargazer(a4b,type='html',summary=F,out='../out/tables/A4B.html')

}



MLS_CALIBRATION_MOMENTS_1 <- function() {
  if(REPLICATION) {
    data.in <- fread('../data/processed/mls/share/combined_processed.csv')
  } else {
    data.in <- fread('../data/processed/mls/combined_processed.csv')
  }
	
	# Fix dates
	data.in[,date := as.Date(substr(date,start = 1,stop = 10),format = '%Y-%m-%d')]
	data.in[,most_recent_sale_date := as.Date(substr(most_recent_sale_date,start = 1,stop = 10),format = '%Y-%m-%d')]
	
	# Get iBuyers
	match.string <- paste(c('opendoor','open door','\\<od [a-z].*',
                    'offerpad','offer pad',
                    'redfin','red fin',
                    'zillow',
                    'knock'),collapse = '|')
	data.in[,iBuyer.buyer := as.integer(grepl(buyer_office_name,pattern = match.string,ignore.case=T) |
			grepl(buyer_agent_name,pattern = match.string,ignore.case=T)	) ]

	data.in[,iBuyer.seller := as.integer(grepl(listing_agent_name,pattern = match.string,ignore.case=T) |
				grepl(listing_agent_name,pattern = match.string,ignore.case=T) | 
				grepl(owner_full_name,pattern = match.string,ignore.case=T)) ]		
	
	# Create sale spell groupings
	data.in <- data.in[order(cc_property_id,date)]
	
	data.in[,ones := 1]
	data.in[,listing.num := cumsum(ones),by='cc_list_id']
	data.in$ones <- NULL
	
	# Information about the spells	
	data.in[,total.listings := max(listing.num),by='cc_list_id']
	data.in[,total.nonsale.listings := sum(status != 'Sale'),by='cc_list_id']                             # listings that aren't marked as sales
	data.in[,first.listing.price := max( (listing.num == 1) * price),by='cc_list_id']                     # price for the first listing
	data.in[,last.listing.price := max( (listing.num == total.nonsale.listings) * price),by='cc_list_id'] # price for the last listing that isn't a sale
	data.in[,sale.price := max( (status == 'Sale') * price),by='cc_list_id']                              # price on the listing that is marked as a sale
	data.in[,has.sale := max( as.integer(status == 'Sale') ),by='cc_list_id']                             # indicator for one of the listings is marked as a sale
	data.in[has.sale == 0,sale.price := NA]                                                               # make sure, if there's no sale, sale price is NA
	data.in[,iBuyer.buyer := max(iBuyer.buyer,na.rm=T),by='cc_list_id']                                   
	data.in[,iBuyer.seller := max(iBuyer.seller,na.rm=T),by='cc_list_id']
	
	# Get first, last, and sale listing information.
	# The "last" listing is the listing that occurs directly before the sale
	# The "sale" listing is the listing that is marked as a sale
	first.listing <- data.in[listing.num == 1,c('cc_list_id','date')]
	last.listing <- data.in[listing.num == total.nonsale.listings,c('cc_list_id','date')]
	sale.listing <- data.in[status == 'Sale',c('cc_list_id','date')]
	sale.listing <- sale.listing[!duplicated(cc_list_id)]
	names(first.listing) <- c('cc_list_id','first.listing.date')
	names(last.listing) <- c('cc_list_id','last.listing.date')
	names(sale.listing) <- c('cc_list_id','sale.date')
	first.and.last <- merge(first.listing,last.listing,by=c('cc_list_id'))
	first.and.last <- merge(first.and.last,sale.listing,by=c('cc_list_id'),all.x=T)
	
	# Collapse by spell.id
	bySpell <- data.in[listing.num == 1,c('cc_property_id','zip','cc_list_id','date','owner_full_name','buyer_agent_name','buyer_office_name','listing_agent_name','listing_office_name',
		'iBuyer.buyer','iBuyer.seller','city','most_recent_sale','most_recent_sale_date',
		'standardized_property_type','sale_type','mls_days_on_market','market_ppsf','mls_cumulative_days_on_market','renovation.in.listing','has.photos',
		'total.listings','first.listing.price','last.listing.price','has.sale','sale.price',
		'tax_amount','fips_county_cd','high_school_name','heating','cooling','property_condition','listing.length.characters','listing.length.words',
		'rooms','bedrooms','floors_in_building','gla_sqft','lot_sqft','year_built','assessed_year','assessed_total_value'),with=F]

	bySpell <- merge(bySpell,first.and.last,by='cc_list_id')
		
	bySpell[,days.first.last.listing := as.numeric(last.listing.date - first.listing.date)]
	bySpell[,days.last.listing.sale  := as.numeric(sale.date - last.listing.date)]
	bySpell[,days.first.sale.listing := as.numeric(sale.date - first.listing.date)]
	
	bySpell <- bySpell[sale_type == 'Fair Market']
	bySpell[,first.listing.year := year(first.listing.date)]
	bySpell <- bySpell[is.na(most_recent_sale) | most_recent_sale > 0]
	bySpell <- bySpell[is.na(most_recent_sale_date) | as.numeric(first.listing.date - most_recent_sale_date) > 0]
	
	
	# Remove people ``selling'' to themselves...
	bySpell <- bySpell[gsub(listing_office_name,pattern=' ',replace='') != gsub(buyer_office_name,pattern=' ',replace='')]
	
	# Some derived things, like % markup, annualized
	bySpell[,t.last.sale.first.list := as.numeric(first.listing.date - most_recent_sale_date) / 365]
	bySpell[,t.last.sale.last.list := as.numeric(last.listing.date - most_recent_sale_date) / 365]
	bySpell[,t.last.sale.this.sale := as.numeric(sale.date - most_recent_sale_date) / 365]
	
	# Markups: First list to last list. First list to sale. Last list to sale.  Last sale to first list, last sale to this sale.
	bySpell[,last.to.first.listing := last.listing.price / first.listing.price - 1]
	bySpell[,last.to.first.listing.ann := last.to.first.listing / (days.first.last.listing / 365)]
	bySpell[,sale.to.last.listing  := sale.price / last.listing.price - 1]
	bySpell[,sale.to.last.listing.ann  := sale.to.last.listing / (days.last.listing.sale / 365)]
	bySpell[,sale.to.first.listing := sale.price / first.listing.price - 1]
	bySpell[,sale.to.first.listing.ann := sale.to.first.listing / (days.first.sale.listing / 365)]
	bySpell[,first.list.to.last.sale := first.listing.price / most_recent_sale - 1]
	bySpell[,first.list.to.last.sale.ann := first.list.to.last.sale / t.last.sale.first.list]
	bySpell[,sale.to.last.sale := sale.price / most_recent_sale - 1]
	bySpell[,sale.to.last.sale.ann := sale.to.last.sale / t.last.sale.this.sale]
	
	bySpell[,listing.month := month(first.listing.date)]
	bySpell[,sell.month    := month(sale.date)]
	bySpell[,sell.quarter    := quarter(sale.date)]
	bySpell[,listing.quarter := quarter(first.listing.date)]
	
	
	# Remove outliers
	bySpell <- bySpell[(is.na(t.last.sale.first.list) | t.last.sale.first.list > 0) & 
					   first.listing.price > 0 & last.listing.price > 0 & # no 0 listing prices
					   (is.na(sale.price) | (sale.price > 0 & sale.price < 1.5e6)) & # want sale price either not existing or > 0 
					   first.listing.price < 1.5e6 & last.listing.price < 1.5e6 & gla_sqft < 10000] # Don't want unusually high prices
	
	# Other derived data
	bySpell[,most_recent_sale_year := year(most_recent_sale_date)]
	bySpell[,age := first.listing.year - year_built]
	bySpell[,age.bucket := cut(age,breaks = c(-999,0,5,10,15,25,50,100,1000))]
	
	bySpell[,size.bin := cut(gla_sqft,breaks = c(0,5000,10000,25000,1000000))]
	bySpell[,size.bin := factor(size.bin,levels(size.bin)[c(4,1:3)])]
	
	# Check whether  listings have nearby listings...
	bySpell <- bySpell[order(cc_property_id,first.listing.date)]
	bySpell[,next.listing.date := shift(first.listing.date,1,type='lead'),by='cc_property_id']
	bySpell[,next.last.listing.date := shift(last.listing.date,1,type='lead'),by='cc_property_id']
	bySpell[,relists.within.month := 0]
	bySpell[next.listing.date - last.listing.date < 30 & has.sale == 0, relists.within.month := 1]
	bySpell[next.listing.date - last.listing.date < 30 & has.sale == 0, relists.within.month := 1]
	bySpell[,next.has.sale := shift(has.sale,1,type='lead'),by='cc_property_id']
	bySpell[,next.total.listings  := shift(total.listings,1,type='lead'),by='cc_property_id']
	bySpell[,next.sale.date := shift(sale.date,1,type='lead'),by='cc_property_id']
	bySpell[,next.sale.price  := shift(sale.price,1,type='lead'),by='cc_property_id']
	
	
	
	bySpell[relists.within.month == 0, has.sale.expansive := has.sale ]
	bySpell[relists.within.month == 0, total.listings.expansive := total.listings]
	bySpell[relists.within.month == 0, sale.price.expansive := sale.price]
	bySpell[relists.within.month == 0, days.first.sale.listing.expansive := sale.date - first.listing.date]
	bySpell[relists.within.month == 0, sale.to.first.listing.expansive := sale.to.first.listing]
	bySpell[relists.within.month == 0, last.listing.date.expansive := last.listing.date ]
	bySpell[relists.within.month == 0, sale.date.expansive := sale.date ]
	
	bySpell[relists.within.month == 1, has.sale.expansive := next.has.sale ]
	bySpell[relists.within.month == 1, total.listings.expansive := next.total.listings + total.listings]
	bySpell[relists.within.month == 1, sale.price.expansive := next.sale.price]
	bySpell[relists.within.month == 1, days.first.sale.listing.expansive := next.sale.date - first.listing.date]
	bySpell[relists.within.month == 1, sale.to.first.listing.expansive := I(sale.price.expansive / first.listing.price)]
	bySpell[relists.within.month == 1, last.listing.date.expansive := next.last.listing.date ]
	bySpell[relists.within.month == 1, sale.date.expansive := next.sale.date ]
	
	
	# Some sensitivty analysis around this
	bySpell[,Lister := 'Other']
	bySpell[t.last.sale.first.list < 1, Lister := 'Flipper']
	bySpell[iBuyer.seller == 1,Lister := 'iBuyer']
	bySpell[,Lister := factor(Lister)]
	bySpell[,Lister := factor(Lister,levels(Lister)[c(3,1,2)])]
	bySpell[,qtr := quarter(first.listing.date)]
	
	# cl = censoring levels
	cl = 0.99
	ll = quantile(abs(bySpell$last.to.first.listing),cl,na.rm=T)
	aa = quantile(abs(bySpell$last.to.first.listing.ann),cl,na.rm=T)
	gg = quantile(abs(bySpell$first.list.to.last.sale),.95,na.rm=T)
	tt = quantile(abs(bySpell$total.listings),.99,na.rm=T)
	
	toReg <- bySpell[first.listing.year %in% 2013:2018 & first.listing.price > 0]
	toReg[,Lister := factor(Lister,levels = c('Other','Flipper','iBuyer'))]
	
	
	
	# iBuyer and Regular seller time on market.
	MOMENT_tom.ib    = mean(toReg[has.sale == 1 & Lister == 'iBuyer']$days.first.sale.listing,na.rm=T)
	MOMENT_tom.hh    = mean(toReg[has.sale == 1 & Lister != 'iBuyer']$days.first.sale.listing,na.rm=T)
	
	# Price reduction
	MOMENT_p.reduce.prices = mean(toReg[Lister == 'iBuyer' & has.sale == 1]$last.to.first.listing != 0)


	moments <- data.table(name =  c('p.reduce.prices','tom.ib','tom.hh'),
						  value = c( MOMENT_p.reduce.prices,  MOMENT_tom.ib,  MOMENT_tom.hh) )
	
	write.table(x=moments,file =  '../out/tables/MLS_CALIBRATION_MOMENTS_1.csv',row.names=F,col.names=T)
}



MLS_CALIBRATION_MOMENTS_2 <- function() {
  # Generates "Impatient dPrice" and "Impatient dDays" used in structural model.
  
  if(REPLICATION) {
    data.in <- fread('../data/processed/mls/share/spells_processed.csv')
  } else {
    data.in <- fread('../data/processed/mls/spells_processed.csv')
  }
  
  # Run hedonic model and try to ID who is under listing, but, annoyingly, we have to merge this result back onto the main dataset for various reasons, which is 
  #hard to do with LFE.
  data.in$GREGS_ID = 1:nrow(data.in)
  data.in <- data.in[has.sale == 1 & first.listing.year %in% 2010:2013 & Lister == 'Other' & first.listing.price > 100000 & 
                       most_recent_sale > 50000 & (first.listing.year - most_recent_sale_year) <= 5 & total.listings  == 2]
  #data.in[,last.sale.year := as.numeric(substr(most_recent_sale_date,1,4))]
  
  
  data.in[,tax_amount_w := tax_amount]
  data.in[tax_amount > quantile(data.in$tax_amount,.99,na.rm=T),tax_amount_w := quantile(data.in$tax_amount,.99,na.rm=T)]
  
  data.in[,gla_sqft_w := gla_sqft]
  data.in[gla_sqft > quantile(data.in$gla_sqft,.99,na.rm=T),gla_sqft_w := quantile(data.in$gla_sqft,.99,na.rm=T)]
  
  data.in[,lot_sqft_w := lot_sqft]
  data.in[lot_sqft > quantile(data.in$lot_sqft,.99,na.rm=T),lot_sqft_w := quantile(data.in$lot_sqft,.99,na.rm=T)]
  
  data.in[,most_recent_sale_w := most_recent_sale]
  data.in[most_recent_sale > quantile(data.in$most_recent_sale,.99,na.rm=T),most_recent_sale_w := quantile(data.in$most_recent_sale,.99,na.rm=T)]
  
  pp1  <- felm(log(first.listing.price)                    ~ log(most_recent_sale_w) * I(first.listing.year - most_recent_sale_year) +  log(tax_amount_w) +  GREGS_ID | 
                 paste(zip,first.listing.year,listing.month)+ renovation.in.listing ,keepX = T,data=data.in)
  
  pp2  <- felm(days.first.sale.listing                    ~ log(most_recent_sale_w) * I(first.listing.year - most_recent_sale_year) +  log(tax_amount_w) +  GREGS_ID | 
                 paste(zip,first.listing.year,listing.month)+ renovation.in.listing ,keepX = T,data=data.in)
  
  
  
  
  
  to.merge1 <- data.table(GREGS_ID = as.numeric(pp1$X[,'GREGS_ID']),listing_residual = as.numeric(pp1$residual))
  to.merge2 <- data.table(GREGS_ID = as.numeric(pp2$X[,'GREGS_ID']),dom_residual = as.numeric(pp2$residual))
  to.merge <- merge(to.merge1,to.merge2,by='GREGS_ID')
  
  merged.data <- merge(data.in,to.merge,by='GREGS_ID')
  merged.data[,sale.bin := ave(days.first.sale.listing,cut2(days.first.sale.listing,g=2))]
  
  lower.half.price = median(merged.data[dom_residual < median(merged.data$dom_residual,na.rm=T)]$listing_residual)
  upper.half.price = median(merged.data[dom_residual > median(merged.data$dom_residual,na.rm=T)]$listing_residual)
  lower.half.tom   = median(merged.data[dom_residual < median(merged.data$dom_residual,na.rm=T)]$dom_residual)
  upper.half.tom   = median(merged.data[dom_residual > median(merged.data$dom_residual,na.rm=T)]$dom_residual)
  
  dPrice = upper.half.price - lower.half.price
  dDays  = upper.half.tom   - lower.half.tom
  
  moments = data.table(moment = c('dPrice','dDays'),
                       values = c(dPrice,dDays))
  
  
  write.table(x=moments,file =  '../out/tables/MLS_CALIBRATION_MOMENTS_2.csv',row.names=F,col.names=T)
  
}



get_coefficients_for_liquidity_prediction <- function() {
	
  if(REPLICATION) {
    toReg <- fread('../data/processed/mls/share/spells_processed.csv')
  } else {
    toReg <- fread('../data/processed/mls/spells_processed.csv')
  }
  
	# Outcome is: do you sell the house within 3 months of listing?
	toReg[,sale.lt3 := 0]
	toReg[!is.na(sale.date) & as.integer(sale.date - first.listing.date) < 90, sale.lt3 := 1]


	tr <- toReg[!is.na(most_recent_sale) & !is.na(lot_sqft) & !is.na(gla_sqft) & !is.na(age) & !is.na(floors_in_building)]
	tr <- tr[most_recent_sale < 1.5e6 & lot_sqft < 8e4 & gla_sqft < 1e5 & age < 100 & most_recent_sale > 0 & lot_sqft > 0 & gla_sqft > 0 & age > 0]
	
	tr[,mr1 := log(most_recent_sale)]
	tr[,mr2 := log(most_recent_sale)^2]
	tr[,mr3 := log(most_recent_sale)^3]
	
	tr[,ls1 := log(lot_sqft)]
	tr[,ls2 := log(lot_sqft)^2]
	tr[,ls3 := log(lot_sqft)^3]
	
	tr[,is1 := log(gla_sqft)]
	tr[,is2 := log(gla_sqft)^2]
	tr[,is3 := log(gla_sqft)^3]
	
	tr[,ha1 := log(age)]
	tr[,ha2 := log(age)^2]
	tr[,ha3 := log(age)^3]
	
	
	# Model is regressing hedonics on this.
	mm <- lm(sale.lt3 ~ mr1 + mr2 + mr3 + ls1 + ls2 + ls3 + is1 + is2 + is3 + ha1 + ha2 + ha3 + I(floors_in_building > 1),data=tr[first.listing.year %in% 2013:2017])
	
	
	# This goes right into the liquidity prediction table in the main code.
	stargazer(summary(mm)$coefficients,summary=F,type='text',out='../out/tables/Liquidity_prediction_coefficients.txt')
}


Table_1B_2A()
Appendix_A3_A4()
MLS_CALIBRATION_MOMENTS_1()
MLS_CALIBRATION_MOMENTS_2()
get_coefficients_for_liquidity_prediction()
